We want to find out what factors lead to failure.
idle_duration_mins: the total duration (minutes) the vehicle was idling for the day
dpf_regen_inhibited_duration_mins: the total duration (minutes) where dpf regen was inhibited for the day (regen cannot occur even if it needs to)
dpf_regen_not_active_duration_mins: the total duration (minutes) where dpf regen was not active for the day (regen not taking place)
dpf_regen_needed_duration_mins: the total duration (minutes) where dpf regen was reported as being needed for the day (regen needed but is not taking place)
dpf_regen_inhibit_switch_active_duration_mins: the total duration (minutes) where the driver accessible dpf inhibit switch was active for the day (regen inhibited by the driver)
import pandas as pd
import matplotlib.pyplot as plt
import ast
from pandas.io.json import json_normalize
import seaborn as sns
import plotly.express as px
# Something to ensure that I can access the vnomics package
import sys
sys.path.insert(1, '../')
import vnomics
import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np
# To keep this consistent across different files
path = '../data_2021/'
# Creating an ETL object from the class I made in my Vnomics package
etl = vnomics.etl()
# Loading in all the data
# Ingesting the data
daily = etl.read_daily()
# Getting Daniel's Data - plus some modifying
data = etl.read_dan()
data
| Days of Rest | dpf_regen_active_duration_mins | diagnostics_trouble_codes | duration_mins | date | Failure | platform_id | dpf_regen_needed_duration_mins | fuel_loss_gallons | dpf_regen_not_active_duration_mins | ... | dpf_regen_inhibit_switch_not_active_duration_mins | dpf_regen_inhibited_duration_mins | dpf_regen_not_inhibited_duration_mins | vehicle_serviced | idle_duration_mins | percent_idle | percent_fuel_lost | percent_regen_inhibited | percent_regen_needed | percent_regen_inhibit_switch_not_active | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.000000 | [] | 222.002017 | 2019-01-02 | 1 | 309450 | 0.00000 | 0.057875 | 154.504550 | ... | 0.0 | 145.853650 | 8.650900 | 0 | 44.141633 | 0.198834 | 0.007474 | 0.656992 | 0.000000 | 0.0 |
| 1 | 0.0 | 0.000000 | [] | 519.221017 | 2019-01-03 | 1 | 309450 | 0.00000 | 0.495596 | 345.015933 | ... | 0.0 | 293.039250 | 51.976683 | 0 | 134.957617 | 0.259923 | 0.015359 | 0.564382 | 0.000000 | 0.0 |
| 2 | 0.0 | 57.857333 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 354.276533 | 2019-01-04 | 1 | 309450 | 46.78665 | 0.281884 | 41.469600 | ... | 0.0 | 115.026133 | 31.087450 | 1 | 83.660883 | 0.236146 | 0.018135 | 0.324679 | 0.132063 | 0.0 |
| 3 | 0.0 | 123.756800 | [] | 131.153333 | 2019-01-05 | 0 | 309450 | 2.96690 | 0.225600 | 0.000000 | ... | 0.0 | 111.522417 | 15.201283 | 0 | 30.391967 | 0.231729 | 0.016585 | 0.850321 | 0.022622 | 0.0 |
| 4 | 0.0 | 0.000000 | [] | 0.000000 | 2019-01-06 | 0 | 309450 | 0.00000 | 0.000000 | 0.000000 | ... | 0.0 | 0.000000 | 0.000000 | 0 | 0.000000 | 0.224759 | 0.054460 | 0.550190 | 0.007432 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 111697 | 0.0 | 35.528000 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 841.246750 | 2020-12-01 | 0 | 10325986 | 14.93560 | 1.103217 | 786.525417 | ... | 0.0 | 753.899317 | 83.089700 | 0 | 64.949683 | 0.077206 | 0.010742 | 0.896169 | 0.017754 | 0.0 |
| 111698 | 0.0 | 0.000000 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 1138.760183 | 2020-12-02 | 0 | 10325986 | 0.00000 | 1.101833 | 1117.560300 | ... | 0.0 | 1003.531917 | 114.028383 | 0 | 99.233983 | 0.087142 | 0.008377 | 0.881250 | 0.000000 | 0.0 |
| 111699 | 0.0 | 0.000000 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 1134.956167 | 2020-12-03 | 0 | 10325986 | 0.00000 | 1.025153 | 1119.136317 | ... | 0.0 | 1008.166433 | 110.969883 | 0 | 87.670717 | 0.077246 | 0.007649 | 0.888287 | 0.000000 | 0.0 |
| 111700 | 0.0 | 0.000000 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 798.424467 | 2020-12-04 | 0 | 10325986 | 0.00000 | 1.080527 | 791.055050 | ... | 0.0 | 686.138833 | 104.916217 | 0 | 75.487267 | 0.094545 | 0.011385 | 0.859366 | 0.000000 | 0.0 |
| 111701 | 0.0 | 0.000000 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 132.229917 | 2020-12-05 | 0 | 10325986 | 0.00000 | 0.374806 | 123.104367 | ... | 0.0 | 100.253500 | 22.850867 | 0 | 20.191967 | 0.152703 | 0.026972 | 0.758176 | 0.000000 | 0.0 |
111702 rows × 23 columns
# Line
features = ['percent_idle','percent_fuel_lost',
'percent_regen_inhibited',
'percent_regen_needed',
'percent_regen_inhibit_switch_not_active']
def inspect(pid = 301117, day_window = 50):
''' Inspects the time series trend of the key feature characterstics day_window days before the failure...
Paramaters
----------
pid
day_window
Displays a figure inline.
'''
# Rightmost ID of the failure
max_id = int(max(data[(data['platform_id']==pid) & (data['Failure'] == 1)].index))
desired_window = [*range(max_id-day_window,max_id)]
mask = [True if i in desired_window else False for i in data[data['platform_id']==pid].index]
window = data[data['platform_id']==pid][mask]
x_values = [*range(- window.shape[0], 0)]
d = [go.Scatter(x=x_values, y=window[feature]*100,
mode = 'lines+markers', name = feature) for feature in features]
layout=go.Layout(title="Key Feature Characteristics Before A Failure", xaxis = dict(tickangle = 90,
showticklabels = True,
type = "category",
dtick = 1), hovermode = 'closest')
fig = go.Figure(data=d, layout = layout)
fig.update_layout(xaxis_title="Days From DPF Failure",
yaxis_title="Percentage")
fig.show()
# Aggregated data
agg_data = data.groupby("platform_id").sum()
# IMPORTANT: This finds all the PIDs with at least one failure in them
failure_pids = agg_data[agg_data['Failure']==15].index.tolist()
# Finding the non failure PIDs as the difference of the two sets
nonfailure_pids = list(set(data['platform_id'].tolist()) - set(failure_pids))
# All PIDs with a Failure
agg_data[agg_data['Failure']==15]
| Days of Rest | dpf_regen_active_duration_mins | duration_mins | Failure | dpf_regen_needed_duration_mins | fuel_loss_gallons | dpf_regen_not_active_duration_mins | dpf_regen_inhibit_switch_active_duration_mins | fuel_used_gallons | distance_miles | dpf_regen_inhibit_switch_not_active_duration_mins | dpf_regen_inhibited_duration_mins | dpf_regen_not_inhibited_duration_mins | vehicle_serviced | idle_duration_mins | percent_idle | percent_fuel_lost | percent_regen_inhibited | percent_regen_needed | percent_regen_inhibit_switch_not_active | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| platform_id | ||||||||||||||||||||
| 300490 | 95.0 | 24835.488150 | 118669.165883 | 15 | 0.083300 | 270.567451 | 52795.565967 | 0.0 | 9769.954558 | 64922.211905 | 0.0 | 68593.784250 | 9037.353167 | 1 | 24805.678200 | 104.515716 | 19.357676 | 246.339442 | 1.315628 | 0.0 |
| 301117 | 286.0 | 6524.362417 | 136857.425233 | 15 | 268.755683 | 775.526456 | 116268.492450 | 0.0 | 12802.508807 | 81254.110667 | 0.0 | 112676.369533 | 10385.241017 | 1 | 27079.820717 | 157.766891 | 44.846908 | 512.675504 | 2.915751 | 0.0 |
| 301475 | 293.0 | 37221.283333 | 182023.107583 | 15 | 0.066650 | 486.707360 | 119963.599167 | 0.0 | 14687.664523 | 93666.421351 | 0.0 | 140610.669067 | 16592.863267 | 1 | 42316.033100 | 182.610722 | 33.300880 | 478.027353 | 2.118926 | 0.0 |
| 301585 | 231.0 | 17577.756367 | 265926.834117 | 15 | 1398.933550 | 825.630585 | 216466.630583 | 0.0 | 28009.522445 | 193994.989119 | 0.0 | 213247.745550 | 22195.574950 | 1 | 44466.468700 | 157.254731 | 33.770370 | 508.347584 | 6.297137 | 0.0 |
| 301589 | 151.0 | 1985.789867 | 307483.787617 | 15 | 124.209650 | 1557.765304 | 180990.041683 | 0.0 | 30482.865557 | 214542.279727 | 0.0 | 167537.062650 | 15562.978550 | 1 | 38059.760300 | 114.637075 | 38.725355 | 406.492246 | 1.410947 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10317538 | 142.0 | 3316.973683 | 413710.106800 | 15 | 317.067000 | 1314.155500 | 7422.700350 | 0.0 | 46888.234279 | 335828.552234 | 0.0 | 911.969917 | 10144.771117 | 1 | 44649.007200 | 105.289792 | 30.586682 | 82.163255 | 1.429294 | 0.0 |
| 10318391 | 148.0 | 4384.368833 | 433576.031783 | 15 | 1013.713267 | 2735.692035 | 20031.172467 | 0.0 | 52381.832942 | 302514.599755 | 0.0 | 703.946483 | 24725.308083 | 1 | 58446.448317 | 125.622654 | 46.526118 | 83.630746 | 2.952431 | 0.0 |
| 10318423 | 141.0 | 372.018133 | 478106.974600 | 15 | 0.000000 | 670.164883 | 22909.551550 | 0.0 | 58965.052584 | 417608.493371 | 0.0 | 323.767183 | 22957.802500 | 1 | 45624.800850 | 95.758578 | 16.701127 | 78.029735 | 1.047937 | 0.0 |
| 10321243 | 147.0 | 15347.384800 | 428134.642333 | 15 | 6558.774267 | 1735.176093 | 6678.812733 | 0.0 | 39206.367826 | 255128.809969 | 0.0 | 442.182083 | 28142.789717 | 1 | 79299.475417 | 150.010262 | 42.236834 | 80.603801 | 11.368452 | 0.0 |
| 10321244 | 170.0 | 8364.337250 | 345059.661183 | 15 | 2740.924183 | 1188.670825 | 8513.217133 | 0.0 | 29067.694985 | 197383.756979 | 0.0 | 515.385717 | 19103.092850 | 1 | 68885.414333 | 160.789661 | 39.945823 | 95.057296 | 6.085949 | 0.0 |
92 rows × 20 columns
failure_pids
[300490, 301117, 301475, 301585, 301589, 301682, 301842, 302054, 302230, 302332, 302576, 303094, 303104, 303148, 303576, 303752, 303757, 303909, 303959, 304104, 304333, 304355, 304493, 304885, 305391, 305712, 305945, 305958, 306479, 306527, 306571, 306890, 307112, 307241, 307422, 307602, 307630, 308049, 308292, 308374, 308609, 308766, 309011, 309057, 309094, 309164, 309265, 309405, 309450, 309607, 309642, 309774, 309984, 310110, 310175, 310176, 310201, 310231, 310280, 310299, 310506, 310929, 311109, 311645, 311976, 312153, 312545, 312738, 312753, 312831, 312834, 312890, 312958, 313154, 313183, 10312705, 10312983, 10312984, 10314364, 10314451, 10314820, 10315841, 10315859, 10315904, 10315950, 10315963, 10316097, 10317538, 10318391, 10318423, 10321243, 10321244]
# Visualizing the platform distribution
all_pids = data['platform_id'].values.tolist()
for pid in failure_pids:
inspect(pid)
# Taking just windows of 15 days before failure
window = data[(data['platform_id']==309450) & (data['Failure'] == 1)]
x_values = [*range(- window.shape[0], 0)]
d = [go.Scatter(x=x_values, y=window[feature]*100,
mode = 'lines+markers', name = feature) for feature in features]
layout=go.Layout(title="Key Feature Characteristics Before A Failure", xaxis = dict(tickangle = 90,
showticklabels = True,
type = "category",
dtick = 1))
fig = go.Figure(data=d, layout = layout)
fig.update_layout(xaxis_title="Days From DPF Failure",
yaxis_title="Percentage")
fig.show()
# Means
data.groupby("platform_id").sum()
| dpf_regen_not_active_duration_mins | dpf_regen_inhibited_duration_mins | dpf_regen_needed_duration_mins | distance_miles | duration_mins | dpf_regen_active_duration_mins | Days of Rest | Failure | vehicle_serviced | fuel_used_gallons | fuel_loss_gallons | dpf_regen_inhibit_switch_not_active_duration_mins | dpf_regen_inhibit_switch_active_duration_mins | idle_duration_mins | dpf_regen_not_inhibited_duration_mins | percent_idle | percent_fuel_lost | percent_regen_inhibited | percent_regen_needed | percent_regen_inhibit_switch_not_active | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| platform_id | ||||||||||||||||||||
| 300490 | 52795.565967 | 68593.784250 | 0.083300 | 64922.211905 | 118669.165883 | 24835.488150 | 95.0 | 15 | 1 | 9769.954558 | 270.567451 | 0.0 | 0.0 | 24805.678200 | 9037.353167 | 104.515716 | 19.357676 | 246.339442 | 1.315628 | 0.0 |
| 300498 | 96969.858633 | 130989.934800 | 0.016683 | 72036.677140 | 210527.153133 | 48728.542100 | 199.0 | 0 | 0 | 12173.779759 | 440.087782 | 0.0 | 0.0 | 57448.345700 | 14708.482617 | 217.246941 | 45.739961 | 420.925530 | 1.471590 | 0.0 |
| 300500 | 190573.704733 | 170367.183433 | 171.687067 | 153831.353036 | 242681.586117 | 3189.995317 | 190.0 | 0 | 0 | 23135.475040 | 1942.188000 | 0.0 | 0.0 | 52316.853300 | 23595.905983 | 175.937673 | 63.160938 | 444.932722 | 1.758131 | 0.0 |
| 300547 | 83429.380067 | 111500.618517 | 0.066717 | 66456.899680 | 222993.562700 | 51337.332867 | 164.0 | 0 | 0 | 10080.268953 | 756.511727 | 0.0 | 0.0 | 71552.154533 | 23280.864783 | 234.359243 | 62.780458 | 350.733062 | 1.204104 | 0.0 |
| 300558 | 157773.572100 | 178694.743817 | 19911.609933 | 46831.911267 | 257093.675733 | 34942.665800 | 226.0 | 0 | 0 | 7337.470222 | 1052.227469 | 0.0 | 0.0 | 93820.835083 | 33933.120633 | 242.959606 | 91.863273 | 469.560398 | 39.374394 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10321243 | 6678.812733 | 442.182083 | 6558.774267 | 255128.809969 | 428134.642333 | 15347.384800 | 147.0 | 15 | 1 | 39206.367826 | 1735.176093 | 0.0 | 0.0 | 79299.475417 | 28142.789717 | 150.010262 | 42.236834 | 80.603801 | 11.368452 | 0.0 |
| 10321244 | 8513.217133 | 515.385717 | 2740.924183 | 197383.756979 | 345059.661183 | 8364.337250 | 170.0 | 15 | 1 | 29067.694985 | 1188.670825 | 0.0 | 0.0 | 68885.414333 | 19103.092850 | 160.789661 | 39.945823 | 95.057296 | 6.085949 | 0.0 |
| 10325986 | 59482.257467 | 51582.150917 | 73.831700 | 52060.436103 | 61031.917417 | 579.533983 | 18.0 | 0 | 0 | 6916.199822 | 49.876596 | 0.0 | 0.0 | 6282.169767 | 8553.472233 | 13.452618 | 2.108056 | 70.930969 | 0.246390 | 0.0 |
| 10326022 | 48800.171867 | 37768.769100 | 41.680083 | 34765.475441 | 51067.205383 | 468.292533 | 17.0 | 0 | 0 | 4425.870343 | 76.931862 | 0.0 | 0.0 | 8357.308183 | 11541.375383 | 17.136225 | 2.346525 | 62.608393 | 0.172065 | 0.0 |
| 10326079 | 4048.425717 | 2666.307117 | 28.641517 | 1859.696106 | 4925.186617 | 176.673417 | 30.0 | 0 | 0 | 246.015152 | 8.854718 | 0.0 | 0.0 | 1407.183483 | 1587.433533 | 11.980568 | 2.314394 | 26.061929 | 0.310935 | 0.0 |
161 rows × 20 columns
data
| platform_id | diagnostics_trouble_codes | dpf_regen_not_active_duration_mins | dpf_regen_inhibited_duration_mins | dpf_regen_needed_duration_mins | distance_miles | duration_mins | dpf_regen_active_duration_mins | Days of Rest | Failure | ... | fuel_loss_gallons | dpf_regen_inhibit_switch_not_active_duration_mins | dpf_regen_inhibit_switch_active_duration_mins | idle_duration_mins | dpf_regen_not_inhibited_duration_mins | percent_idle | percent_fuel_lost | percent_regen_inhibited | percent_regen_needed | percent_regen_inhibit_switch_not_active | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 309450 | [] | 154.504550 | 145.853650 | 0.00000 | 65.294602 | 222.002017 | 0.000000 | 0.0 | 1 | ... | 0.057875 | 0.0 | 0.0 | 44.141633 | 8.650900 | 0.198834 | 0.007474 | 0.656992 | 0.000000 | 0.0 |
| 1 | 309450 | [] | 345.015933 | 293.039250 | 0.00000 | 210.269609 | 519.221017 | 0.000000 | 0.0 | 1 | ... | 0.495596 | 0.0 | 0.0 | 134.957617 | 51.976683 | 0.259923 | 0.015359 | 0.564382 | 0.000000 | 0.0 |
| 2 | 309450 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 41.469600 | 115.026133 | 46.78665 | 98.413486 | 354.276533 | 57.857333 | 0.0 | 1 | ... | 0.281884 | 0.0 | 0.0 | 83.660883 | 31.087450 | 0.236146 | 0.018135 | 0.324679 | 0.132063 | 0.0 |
| 3 | 309450 | [] | 0.000000 | 111.522417 | 2.96690 | 74.838556 | 131.153333 | 123.756800 | 0.0 | 0 | ... | 0.225600 | 0.0 | 0.0 | 30.391967 | 15.201283 | 0.231729 | 0.016585 | 0.850321 | 0.022622 | 0.0 |
| 4 | 309450 | [] | 0.000000 | 0.000000 | 0.00000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0 | ... | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.224759 | 0.054460 | 0.550190 | 0.007432 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 111697 | 10325986 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 786.525417 | 753.899317 | 14.93560 | 776.873155 | 841.246750 | 35.528000 | 0.0 | 0 | ... | 1.103217 | 0.0 | 0.0 | 64.949683 | 83.089700 | 0.077206 | 0.010742 | 0.896169 | 0.017754 | 0.0 |
| 111698 | 10325986 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 1117.560300 | 1003.531917 | 0.00000 | 1035.033122 | 1138.760183 | 0.000000 | 0.0 | 0 | ... | 1.101833 | 0.0 | 0.0 | 99.233983 | 114.028383 | 0.087142 | 0.008377 | 0.881250 | 0.000000 | 0.0 |
| 111699 | 10325986 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 1119.136317 | 1008.166433 | 0.00000 | 1035.683687 | 1134.956167 | 0.000000 | 0.0 | 0 | ... | 1.025153 | 0.0 | 0.0 | 87.670717 | 110.969883 | 0.077246 | 0.007649 | 0.888287 | 0.000000 | 0.0 |
| 111700 | 10325986 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 791.055050 | 686.138833 | 0.00000 | 694.809906 | 798.424467 | 0.000000 | 0.0 | 0 | ... | 1.080527 | 0.0 | 0.0 | 75.487267 | 104.916217 | 0.094545 | 0.011385 | 0.859366 | 0.000000 | 0.0 |
| 111701 | 10325986 | [{'j1939': {'severity': 'unclassified', 'fmi':... | 123.104367 | 100.253500 | 0.00000 | 95.052341 | 132.229917 | 0.000000 | 0.0 | 0 | ... | 0.374806 | 0.0 | 0.0 | 20.191967 | 22.850867 | 0.152703 | 0.026972 | 0.758176 | 0.000000 | 0.0 |
111702 rows × 23 columns
# Seeing Line for Each Feature = Failure vs Non Failure
x = data[features[0]]
pid_group = data.groupby('platform_id').size()
pid_group
platform_id
300490 441
300498 729
300500 715
300547 729
300558 729
...
10321243 729
10321244 727
10325986 96
10326022 92
10326079 48
Length: 161, dtype: int64
Here we see that one platform ID may have different numbers of trucks.
Here, we see that the minimum number of trucks in any given platform is 48 - but honestly most trucks safely have 729.
However, it is nice to see that most platforms have 729 trucks exactly.
counts = pid_group.value_counts().index.sort_values().tolist()
counts
[48, 92, 96, 268, 285, 302, 309, 352, 394, 400, 441, 560, 570, 665, 679, 710, 713, 715, 716, 717, 718, 722, 723, 725, 727, 728, 729, 730]
sum(pid_group.value_counts().values)
161
We have a total of 161 platforms in the dataset.
We want to make sure we have enough data before the failure happens - so no failure within the first 15 days.
We may have to aggregate the dates to make it less granular. We may need some windowing - take 15 rows at a time to feed it in TSFresh. For each batch of 15 rows, we are trying to make a prediction
For each of the 69 trucks that don't have DPF failure, how much time series data do you want to feed? This is where we want to do that windowing.
To have a standard number of time series windows - I drop the platform IDs that do not have the top number of vehicles in them. I just create this as a separate data.
def standardize_window(raw_data):
'''Takes out the data that has less than 729 time window in the data
'''
pid_group = all_data.groupby('platform_id').size()
# Here I take the platform IDs that has 729
full_pids = [index for index, value in
zip(pid_group.index, pid_group.values) if value == 729]
# Just looking at the head of this
full_pids[:5]
# Just a unit test to prove that we are taking a subset of all the PIDs
print(f"Before: {len(full_pids)} rows \nAfter: {len(pid_group)} rows")
# Subsetting the data to only the platforms with a time series window of 729
mask = [i in full_pids for i in all_data['platform_id']]
fullwindow_data = all_data[mask]
return fullwindow_data
fullwindow_data = standardize_window(all_data)
fullwindow_data.head()
Before: 112 rows After: 161 rows
| date | platform_id | distance_miles | fuel_used_gallons | duration_mins | idle_duration_mins | fuel_loss_gallons | dpf_regen_inhibited_duration_mins | dpf_regen_not_inhibited_duration_mins | dpf_regen_not_active_duration_mins | dpf_regen_active_duration_mins | dpf_regen_needed_duration_mins | dpf_regen_inhibit_switch_not_active_duration_mins | dpf_regen_inhibit_switch_active_duration_mins | vehicle_serviced | diagnostics_trouble_codes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-01-02 | 309450 | 65.294602 | 7.743177 | 222.002017 | 44.141633 | 0.057875 | 145.853650 | 8.650900 | 154.504550 | 0.000000 | 0.00000 | 0.0 | 0.0 | 0 | [] |
| 1 | 2019-01-03 | 309450 | 210.269609 | 32.266755 | 519.221017 | 134.957617 | 0.495596 | 293.039250 | 51.976683 | 345.015933 | 0.000000 | 0.00000 | 0.0 | 0.0 | 0 | [] |
| 2 | 2019-01-04 | 309450 | 98.413486 | 15.543700 | 354.276533 | 83.660883 | 0.281884 | 115.026133 | 31.087450 | 41.469600 | 57.857333 | 46.78665 | 0.0 | 0.0 | 1 | [{'j1939': {'severity': 'unclassified', 'fmi':... |
| 3 | 2019-01-05 | 309450 | 74.838556 | 13.602637 | 131.153333 | 30.391967 | 0.225600 | 111.522417 | 15.201283 | 0.000000 | 123.756800 | 2.96690 | 0.0 | 0.0 | 0 | [] |
| 4 | 2019-01-06 | 309450 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.00000 | 0.0 | 0.0 | 0 | [] |
# Saving
fullwindow_data.to_csv(path + 'fullwindow_data.csv')
# To show the difference between this and the initial data
all_data.shape
(111702, 16)
corr = data.corr()
matrix = np.triu(corr)
sns.heatmap(corr, cmap = "coolwarm", mask=matrix);
plt.figure(figsize=(10,8))
sns.heatmap(all_data.corr(), cmap = 'coolwarm');